#include <cstring>

#include "nanovoid.h"


struct HpLSHCMP {
  HashPointerBank* hp;
  uint size;
  
  HpLSHCMP(HashPointerBank* _hp) :
    hp(_hp), size((_hp->getK())*sizeof(int))
  {}

  bool operator() (uint i, uint j) {
    return (memcmp((hp->d[i]).lsh_hash_code, (hp->d[j]).lsh_hash_code, size) < 0);
  }
};

void OneStep::merge_with_same_lsh_v2() {
  // merging hash buckets with the same lsh code in each column
  uint cl = 0;
  vector< vector< uint> >::iterator it;
  HpLSHCMP hp_lsh_cmp(&(hash_t.hp));
  for ( ; cl < L; ++ cl) {
    //uint lid = 0;
    for (it = hash_t.l[cl].begin(); it != hash_t.l[cl].end(); ++ it) {
      if (it->size() == 0)
        continue;

      //printf("cl=%u, lid=%u\n", cl, lid++);
      
      sort(it->begin(), it->end(), hp_lsh_cmp);
      
      vector< uint >::iterator col2 = it->begin();
      HashPointer* t_hp_col2 = &(hash_t.hp.d[*col2]);
      vector< uint >::iterator col = it->begin() + 1;
      HashPointer* t_hp_col;
      while (col != it->end()) {
        // combine as many buckets as possible
        while (col != it->end()) {
          t_hp_col = &(hash_t.hp.d[*col]);
          //printf("merge hp_col2=%u with hp_col=%u\n", *col2, *col);
          
          if (memcmp(t_hp_col2->lsh_hash_code, t_hp_col->lsh_hash_code, sizeof(int)*K)!=0)
            break;
          
          // combine the two buckets
          if (t_hp_col2->pnb != t_hp_col->pnb) {
            //printf("pnb NOT agree, t_hp_col2->pnb=%u t_hp_col->pnb=%u\n", t_hp_col2->pnb, t_hp_col->pnb);
            //fflush(stdout);
            // need merge the PNBuckets
            uint cll = 0, from_id, to_id;
            uint ori_root_item = pnb.d[t_hp_col2->pnb].p_list;
            // need to merge with t_hp_col->pnb's item2hp!!!
            uint col_root_item = pnb.d[t_hp_col->pnb].p_list;
            //printf("ori_root_item=%u col_root_item=%u\n", ori_root_item, col_root_item);
            for (cll = 0; cll < L; ++ cll) {
              to_id = item2hp_id[ori_root_item][cll];
              from_id = item2hp_id[col_root_item][cll];
              
              // also need to update the pointer from hp to pnb!!!
              uint bit = item2hp_hash.head(from_id);
              if (cll == cl) {
                while (bit != UINT_NULL) {
                  uint hp_it = item2hp_hash.bit2c(bit);
                  if (hp_it != *col)
                    hash_t.hp.d[hp_it].pnb = t_hp_col2->pnb;
                  bit = item2hp_hash.bit2right(bit);
                }
              }else{
                while (bit != UINT_NULL) {
                  uint hp_it = item2hp_hash.bit2c(bit);
                  hash_t.hp.d[hp_it].pnb = t_hp_col2->pnb;
                  bit = item2hp_hash.bit2right(bit);
                }
              }
              item2hp_hash.move_from_id_to_id(from_id, to_id);
            }
            //printf("finish building t_hps\n");
            //fflush(stdout);

            uint root_a = (pnb.d[t_hp_col2->pnb].p_list);
            uint root_b = (pnb.d[t_hp_col->pnb].p_list);
            if (inv.d_size(inv.item2pd[root_a]) < inv.d_size(inv.item2pd[root_b])) {
              assign_vals(new_v, root_b, new_v, root_a);
            }
            
            pnb.d[t_hp_col2->pnb].merge_with(&pnb.d[t_hp_col->pnb], pnb.n_list_hash, inv);
            //inv.check_from_dfslist(num_items);
            //also need to update the new_v ( do not think so, because merge will preserve one of the roots.
            //item2hp[pnb.d[t_hp_col2->pnb].p_list][cl] = (*col2);  // TODO, xyx: needs to update for all columns, right??
            //printf("finish merge with\n");
            //fflush(stdout);

            //t_hps[cl].erase(*col);
            from_id = item2hp_hash.find(item2hp_id[ori_root_item][cl], *col, to_id);
            assert(from_id != UINT_NULL);
            item2hp_hash.delete_(from_id);
            
            // also need to update the pointer from hp to pnb!!!
            /* this part has been handled by another piece of code before.
            uint t_hp_col2_pnb = t_hp_col2->pnb;
            for (cll = 0; cll < L; ++ cll) {
              set< uint >::iterator hp_it = t_hps[cll].begin();
              for ( ; hp_it != t_hps[cll].end(); ++ hp_it) {
                //printf("  *hp_it = %u, t_hp_col2_pnb = %u\n", *hp_it, t_hp_col2_pnb);
                hash_t.hp.d[*hp_it].pnb = t_hp_col2_pnb;
              }
            }
            */
            uint root_item = pnb.d[t_hp_col2->pnb].p_list;
            if (root_item != ori_root_item) {
              for (cll = 0; cll < L; ++ cll) {
                from_id = item2hp_id[ori_root_item][cll];
                to_id = item2hp_id[root_item][cll];
                item2hp_hash.clear(to_id);
                item2hp_hash.move_from_id_to_id(from_id, to_id);
              }
            }
            pnb.free_elem(t_hp_col->pnb);
          }else{
            //printf("pnb agrees\n");
            //fflush(stdout);
            //uint cll = 0;
            uint root_item = pnb.d[t_hp_col2->pnb].p_list;
            /*
            uint col_root_item = pnb.d[t_hp_col->pnb].p_list;
            for (cll = 0; cll < L; ++ cll) {
              item2hp[root_item][cll].insert(item2hp[col_root_item][cll].begin(), \
                                             item2hp[col_root_item][cll].end());
            }
            */
            //item2hp[root_item][cl].erase(*col);
            uint hash;
            uint bit = item2hp_hash.find(item2hp_id[root_item][cl], *col, hash);
            assert(bit != UINT_NULL);
            item2hp_hash.delete_(bit);
          }

          hash_t.hp.free_elem(*col);
          
          //
          ++col;
        }
        if (col != it->end()) {
          ++col2;
          *col2 = *col;
          t_hp_col2 = &(hash_t.hp.d[*col2]);          
          ++col;
        }
      }
      it->erase(col2+1, it->end());
    }    
  }  
}


/*

void OneStep::merge_with_same_lsh() {
  // merging hash buckets with the same lsh code in each column
  uint cl = 0;
  vector< vector< uint> >::iterator it;
  for (cl = 0; cl < L; ++ cl) {
    for (it = hash_t.l[cl].begin(); it != hash_t.l[cl].end(); ++ it) {
      vector< uint >::iterator col = it->begin();
      while (col != it->end()) {
        HashPointer* t_hp_col = &(hash_t.hp.d[*col]);
        vector< uint >::iterator col2; 
        for (col2 = it->begin(); col2 != col; ++ col2)
          if (memcmp(hash_t.hp.d[*col2].lsh_hash_code, t_hp_col->lsh_hash_code,\
                     sizeof(int)*K) == 0)
            break;                            
        if (col2 != col){
          // need merge
          HashPointer* t_hp_col2 = &(hash_t.hp.d[*col2]);
          //printf("col2= %u col=%u t_hp_col2->pnb=%u t_hp_col->pnb=%u\n", *col2, *col, \
                 t_hp_col2->pnb, t_hp_col->pnb);
         
          fflush(stdout);
          if (t_hp_col2->pnb != t_hp_col->pnb) {
            // need merge the PNBuckets
            set<uint> t_hps[L];
            uint cll = 0;
            uint root_item = pnb.d[t_hp_col2->pnb].p_list;
            for ( ; cll < L; ++ cll)
              t_hps[cll].insert(item2hp[root_item][cll].begin(), \
                                item2hp[root_item][cll].end());
            // need to merge with t_hp_col->pnb's item2hp!!!
            uint col_root_item = pnb.d[t_hp_col->pnb].p_list;
            for (cll = 0; cll < L; ++ cll)
              t_hps[cll].insert(item2hp[col_root_item][cll].begin(), \
                                item2hp[col_root_item][cll].end());
            //printf("finish building t_hps\n");
            //fflush(stdout);

            uint root_a = (pnb.d[t_hp_col2->pnb].p_list);
            uint root_b = (pnb.d[t_hp_col->pnb].p_list);
            if (inv.d_size(inv.item2pd[root_a]) < inv.d_size(inv.item2pd[root_b])) {
              assign_vals(new_v, root_b, new_v, root_a);
            }
            
            pnb.d[t_hp_col2->pnb].merge_with(&pnb.d[t_hp_col->pnb], pnb.n_list_hash, inv);
            //inv.check_from_dfslist(num_items);
            //also need to update the new_v ( do not think so, because merge will preserve one of the roots.
            //item2hp[pnb.d[t_hp_col2->pnb].p_list][cl] = (*col2);  // TODO, xyx: needs to update for all columns, right??
            //printf("finish merge with\n");
            //fflush(stdout);
            
            t_hps[cl].erase(*col);
            // also need to update the pointer from hp to pnb!!!
            uint t_hp_col2_pnb = t_hp_col2->pnb;
            for (cll = 0; cll < L; ++ cll) {
              set< uint >::iterator hp_it = t_hps[cll].begin();
              for ( ; hp_it != t_hps[cll].end(); ++ hp_it) {
                //printf("  *hp_it = %u, t_hp_col2_pnb = %u\n", *hp_it, t_hp_col2_pnb);
                hash_t.hp.d[*hp_it].pnb = t_hp_col2_pnb;
              }
            }
            root_item = pnb.d[t_hp_col2->pnb].p_list;
            for (cll = 0; cll < L; ++ cll) {
              item2hp[root_item][cll].clear();
              item2hp[root_item][cll].insert(t_hps[cll].begin(), t_hps[cll].end());
            }
            pnb.free_elem(t_hp_col->pnb);
          }else{
            //printf("pnb agrees\n");
            //fflush(stdout);
            //uint cll = 0;
            uint root_item = pnb.d[t_hp_col2->pnb].p_list;
       
            //////////// this part was commented out ////////////////
            uint col_root_item = pnb.d[t_hp_col->pnb].p_list;
            for (cll = 0; cll < L; ++ cll) {
              item2hp[root_item][cll].insert(item2hp[col_root_item][cll].begin(), \
                                             item2hp[col_root_item][cll].end());
            }
            //////////// end of this part was commented out ////////////////

            item2hp[root_item][cl].erase(*col);
          }

          hash_t.hp.free_elem(*col);
          col = it->erase(col);
        }else
          ++ col;
      }
    }
  }
}

*/

void OneStep::next() {
  valueType vals[vals_len];
  vector< uint > active_list;

  //printf("one-step-eval\n");
  //check_non_empty_item2hp();
  // do one step of evaluation
  vector< vector< uint > >::iterator it;
  for (it = hash_t.l[0].begin(); it != hash_t.l[0].end(); ++ it) {
    vector< uint >::iterator itt;
    for (itt = it->begin(); itt != it->end(); ++ itt) {
      uint p_pnb = hash_t.hp.d[*itt].pnb;
      grab_vals(pnb.d[p_pnb].p_list, old_v, vals);
      forward_one_step(vals, pnb.d[p_pnb].p_list, new_v);
      pnb.d[p_pnb].at_least_one_lsh_changed = false;
    }
  }
  //inv.check_from_dfslist(num_items);

  //printf("move hash buckets across different hash_t.l entries\n");
  //check_non_empty_item2hp();
  // move hash buckets across different hash_t.l entries
  int new_lsh_hash_code[K];
  uint cl = 0;
  for (cl = 0; cl < L; ++ cl) {
    uint lid = 0;
    uint hash_t_size = hash_t.l[cl].size();
    for ( ; lid < hash_t_size; ++ lid) {
      vector< uint >::iterator itt = hash_t.l[cl][lid].begin();
      //printf("l.id=%u it->size()=%lu\n", lid, hash_t.l[lid].size());
      while (itt != hash_t.l[cl][lid].end()) {
        HashPointer* t_hp = &(hash_t.hp.d[*itt]);
        uint p_pnb = t_hp->pnb;
        grab_vals(pnb.d[p_pnb].p_list, new_v, vals);

        lsh.lsh(vals, cl, new_lsh_hash_code);
        uint new_hash_code = hash_t.hash_from_lsh(new_lsh_hash_code);
        //printf("new_lsh_hash_code=%d new_hash_code=%u\n", new_lsh_hash_code, new_hash_code);
        //printf("(*itt)->lsh_hash_code=%d (*itt)->hash_code=%u\n", (*itt)->lsh_hash_code, (*itt)->hash_code);
      
        if (memcmp(new_lsh_hash_code, t_hp->lsh_hash_code, sizeof(int)*K) == 0) {
          // nothing needs to be done; move to next hash bucket
          ++ itt;
        }else{
          // lsh code has changed;
          //(*itt)->lsh_hash_code = new_lsh_hash_code;
          memcpy(t_hp->lsh_hash_code, new_lsh_hash_code, sizeof(int)*K);
          // put all entries in the nlist to the active list.
          if (!pnb.d[p_pnb].at_least_one_lsh_changed) {
            /*active_list.insert(active_list.end(),                     
                               (pnb.d[p_pnb].n_list).begin(), (pnb.d[p_pnb].n_list).end());
            */
            uint n_list_it = pnb.n_list_hash.head(pnb.d[p_pnb].n_list_id);
            while (n_list_it != UINT_NULL) {
              active_list.push_back(pnb.n_list_hash.bit2c(n_list_it));
              n_list_it = pnb.n_list_hash.bit2right(n_list_it);
            }
            pnb.d[p_pnb].at_least_one_lsh_changed = true;
          }
          if (new_hash_code != t_hp->hash_code) {
            // both lsh and hash_code have changed.
            // move to the new location
            // add to the new location
            hash_t.l[cl][hash_t.l_loc_from_hash(new_hash_code, cl)].push_back(*itt);
            // update its hash_code
            //uint old_hash_code = (*itt)->hash_code;
            (t_hp)->hash_code = new_hash_code;
            // remove it here
            itt = (hash_t.l[cl][lid]).erase(itt);
          }else{
            ++ itt;
          }
        }
        //printf("end itt iter\n");
      }
    }
  }
  //inv.check_from_dfslist(num_items);

  //printf("merging hash buckets with the same lsh code in each column\n");
  //fflush(stdout);
  //check_non_empty_item2hp();
  merge_with_same_lsh_v2();
  //inv.check_from_dfslist(num_items);
  //hash_t.print_hash_table(inv);
  
  //printf("deal with active_list\n");
  //check_non_empty_item2hp();
  // deal with active_list
  sort(active_list.begin(), active_list.end());
  vector<uint>::iterator active_list_last = unique(active_list.begin(), \
                                                   active_list.end());
  vector<uint>::iterator active_elem;
  for (active_elem = active_list.begin(); active_elem != active_list_last;
       ++ active_elem) {
    //printf("begin process_active_elem, active_elem=%u\n", *active_elem);
    process_active_elem(*active_elem);
    //check_non_empty_item2hp();
    //printf("returned from process_active_elem\n");
  }
  //inv.check_from_dfslist(num_items);

  //printf("second time: merging hash buckets with the same lsh code in each column\n");
  //fflush(stdout);
  //check_non_empty_item2hp();
  merge_with_same_lsh_v2();
  
  //inv.check_from_dfslist(num_items);
  //hash_t.print_hash_table(inv);
  
  //printf("assign new_v to old_v\n");
  // move new_v to old_v
  for (it = hash_t.l[0].begin(); it != hash_t.l[0].end(); ++ it) {
    vector< uint >::iterator itt;
    for (itt = it->begin(); itt != it->end(); ++ itt) {
      //int rep_x = (*itt)->p_list_x;
      //int rep_y = (*itt)->p_list_y;
      //old_v[rep_x][rep_y] = new_v[rep_x][rep_y];
      //old_v[(*itt)->p_list] = new_v[(*itt)->p_list];
      HashPointer* t_hp = &(hash_t.hp.d[*itt]);
      assign_vals(new_v, pnb.d[t_hp->pnb].p_list, old_v, pnb.d[t_hp->pnb].p_list);
    }
  }
  //inv.check_from_dfslist(num_items);
}


void OneStep::process_active_elem(uint active_elem) {
  //printf("active_elem=%u, ", active_elem);
  //fflush(stdout);
  valueType vals[vals_len];
  grab_vals(active_elem, new_v, vals);
  
  uint root = inv.find_(inv.item2pd[active_elem]);
  uint root_item = inv.d_item(root);
  uint root_size = inv.d_size(root);
  
  int active_elem_lsh[K*L];
  uint cl = 0;

  // calculate out lsh for later use; no need to calculate them many times.
  for (cl = 0; cl < L; ++ cl) {
    lsh.lsh(vals, cl, active_elem_lsh + cl*K);
  }
  
  if (root_size == 1) {
    for (cl = 0; cl < L; ++ cl) {
      if (item2hp_hash.more_than_one_elem(item2hp_id[root_item][cl])) {
        printf("process_active_elem:: item2hp[%u][%u].size() > 1\n", root_item, cl);
        //assert(item2hp[root_item][cl].size() == 1);
        /*
        set<uint>::iterator it = item2hp[root_item][cl].begin();
        ++it;
        while (it != item2hp[root_item][cl].end()) {
          HashPointer* hp_it = &(hash_t.hp.d[*it]);
          hash_t.move_out(*it, hp_it->lsh_hash_code, cl);
          hash_t.hp.free_elem(*it);
          it = item2hp[root_item][cl].erase(it);
        }*/
        uint bit = item2hp_hash.head(item2hp_id[root_item][cl]);
        bit = item2hp_hash.bit2right(bit);
        uint this_bit;
        while (bit != UINT_NULL) {
          uint it = item2hp_hash.bit2c(bit);
          HashPointer* hp_it = &(hash_t.hp.d[it]);
          hash_t.move_out(it, hp_it->lsh_hash_code, cl);
          hash_t.hp.free_elem(it);
          this_bit = bit;
          bit = item2hp_hash.bit2right(bit);
          item2hp_hash.delete_(this_bit);
        }
      }
      //uint root_hp_id = (*item2hp[root_item][cl].begin());
      uint root_hp_bit = item2hp_hash.head(item2hp_id[root_item][cl]);
      uint root_hp_id = item2hp_hash.bit2c(root_hp_bit);
      HashPointer* root_hp = &(hash_t.hp.d[root_hp_id]);

      //lsh.lsh(vals, cl, active_elem_lsh);

      if (memcmp(active_elem_lsh + cl*K, root_hp->lsh_hash_code, sizeof(int)*K) != 0) {
        uint active_elem_hash_code = hash_t.hash_from_lsh(active_elem_lsh + cl*K);
        if (active_elem_hash_code != root_hp->hash_code) {
          hash_t.move_out(root_hp_id, root_hp->lsh_hash_code, cl);
          root_hp->hash_code = active_elem_hash_code;
          hash_t.insert(root_hp_id, active_elem_hash_code, cl);
        }
        memcpy(root_hp->lsh_hash_code, active_elem_lsh + cl*K, sizeof(int)*K);
      }
    }
  }else{ // root_size > 1
    // determine if pnb actually needs split
    bool pnb_need_split = true;
    for (cl = 0; cl < L; ++ cl) {
      //lsh.lsh(vals, cl, active_elem_lsh);

      /*
      set< uint >::iterator root_hp_it = item2hp[root_item][cl].begin();
      for ( ; root_hp_it != item2hp[root_item][cl].end(); ++ root_hp_it) {
        HashPointer* root_hp = &(hash_t.hp.d[*root_hp_it]);
        if (memcmp(active_elem_lsh + cl*K, root_hp->lsh_hash_code, sizeof(int)*K) == 0){
          pnb_need_split = false;
          break;
        }
      }
      */
      uint root_hp_bit = item2hp_hash.head(item2hp_id[root_item][cl]);
      while (root_hp_bit != UINT_NULL) {
        HashPointer* root_hp = &(hash_t.hp.d[item2hp_hash.bit2c(root_hp_bit)]);
        if (memcmp(active_elem_lsh + cl*K, root_hp->lsh_hash_code, sizeof(int)*K) == 0){
          pnb_need_split = false;
          break;
        }
        root_hp_bit = item2hp_hash.bit2right(root_hp_bit);
      }

      if (!pnb_need_split)
        break;
    }

    if (pnb_need_split) {
      pair<uint,uint> new_pd = inv.delete_(inv.item2pd[active_elem], root);
                            // automatically update root_size
      
      // need update the item2bucket for root as well (since it may also change).
      uint new_root_item = inv.d_item(new_pd.second);
      /*
      set< uint >::iterator new_root_hp_it = item2hp[root_item][0].begin();
      uint pnb_id = hash_t.hp.d[*new_root_hp_it].pnb;
      */
      uint new_root_hp_it = item2hp_hash.bit2c(item2hp_hash.head(item2hp_id[root_item][0]));
      uint pnb_id = hash_t.hp.d[new_root_hp_it].pnb;
      
      
      if (new_root_item != root_item) {
        /*
        for (cl = 0; cl < L; ++ cl) {
          item2hp[new_root_item][cl].clear();
          item2hp[new_root_item][cl].insert(item2hp[root_item][cl].begin(), \
                                            item2hp[root_item][cl].end());
          item2hp[root_item][cl].clear();
        }
        */
        for (cl = 0; cl < L; ++ cl) {
          item2hp_hash.clear(item2hp_id[new_root_item][cl]);
          item2hp_hash.move_from_id_to_id(item2hp_id[root_item][cl], \
                                          item2hp_id[new_root_item][cl]);
        }
        pnb.d[pnb_id].p_list = new_root_item;
        //new_v[inv.d_item(new_pd.second)] = new_v[root_item];
        assign_vals(new_v, root_item, new_v, new_root_item);
      }

      // take care of the n_list
      move_out_neighbor_from_n_list(active_elem, &(pnb.d[pnb_id]));

      // determine if the split node can be added to other pnb;
      uint pnb_to_add = UINT_NULL;
      for (cl = 0; cl < L; ++ cl) {
        //lsh.lsh(vals, cl, active_elem_lsh);

        uint hp_it = hash_t.find(active_elem_lsh + cl*K, cl);
        if (hp_it != UINT_NULL) {
          pnb_to_add = hash_t.hp.d[hp_it].pnb;
          break;
        }
      }

      uint active_elem_pd = new_pd.first;
      PNBucket* pn_it = NULL;
      if (pnb_to_add != UINT_NULL) {
        // merge into this bucket
        pn_it = &(pnb.d[pnb_to_add]);
        uint ori_pn_plist = pn_it->p_list;
        uint pn_pd = inv.item2pd[pn_it->p_list];
        uint pn_root = inv.union_(active_elem_pd, pn_pd);
        pn_it->p_list = inv.d_item(pn_root);

        if (pn_it->p_list != ori_pn_plist) {
          /*
          for (cl = 0; cl < L; ++ cl) {
            item2hp[pn_it->p_list][cl].clear();
            item2hp[pn_it->p_list][cl].insert(item2hp[ori_pn_plist][cl].begin(), \
                                              item2hp[ori_pn_plist][cl].end());
            item2hp[ori_pn_plist][cl].clear();
          }
          */
          for (cl = 0; cl < L; ++ cl) {
            item2hp_hash.clear(item2hp_id[pn_it->p_list][cl]);
            item2hp_hash.move_from_id_to_id(item2hp_id[ori_pn_plist][cl], \
                                            item2hp_id[pn_it->p_list][cl]);
          }
        }
        merge_neighbor_into_n_list(active_elem, pn_it);
        //new_v[pn_it->p_list] = new_v[ori_pn_plist];
        assign_vals(new_v, ori_pn_plist, new_v, pn_it->p_list);
      }else{
        // build a new pnb;
        pnb_to_add = pnb.new_elem();
        pn_it = &(pnb.d[pnb_to_add]);
        pn_it->p_list = active_elem;
        //assert((pn_it->n_list).size() == 0);
        //assert(pnb.n_list_hash.head(pn_it->n_list_id) == UINT_NULL);
        merge_neighbor_into_n_list(active_elem, pn_it);
        //new_v[active_elem] = new_v[root_item];
        assign_vals(new_v, root_item, new_v, active_elem);
        /*
        for (cl = 0; cl < L; ++ cl)    // TODO: XYX: why do we need to clear this here??
          item2hp[active_elem][cl].clear();
        */
        for (cl = 0; cl < L; ++ cl) {
          item2hp_hash.clear(item2hp_id[active_elem][cl]);
        }
      }

      //
      for (cl = 0; cl < L; ++ cl) {
        //lsh.lsh(vals, cl, active_elem_lsh);

        uint hp_it = hash_t.find(active_elem_lsh + cl*K, cl);
        if (hp_it == UINT_NULL || hash_t.hp.d[hp_it].pnb != pnb_to_add) {
          uint new_hp_id = hash_t.hp.new_elem();
          HashPointer* new_hp = &(hash_t.hp.d[new_hp_id]);

          memcpy(new_hp->lsh_hash_code, active_elem_lsh + cl*K, sizeof(int)*K);
          new_hp->hash_code = hash_t.hash_from_lsh(active_elem_lsh + cl*K);
          new_hp->pnb = pnb_to_add;
          hash_t.insert(new_hp_id, new_hp->hash_code, cl);
          //item2hp[pn_it->p_list][cl].insert(new_hp_id);
          item2hp_hash.insert_no_duplicate(item2hp_id[pn_it->p_list][cl], new_hp_id);
        }
      }
          
    }else{ // !pnb_need_split
      for (cl = 0; cl < L; ++ cl) {
        //lsh.lsh(vals, cl, active_elem_lsh);

        bool need_new_bucket = true;
        /*
        set< uint >::iterator root_hp_it = item2hp[root_item][cl].begin();
        uint pnb_id = hash_t.hp.d[*root_hp_it].pnb;
        for ( ; root_hp_it != item2hp[root_item][cl].end(); ++ root_hp_it) {
          HashPointer* root_hp = &(hash_t.hp.d[*root_hp_it]);
          if (memcmp(active_elem_lsh + cl*K, root_hp->lsh_hash_code, sizeof(int)*K) == 0){
            need_new_bucket = false;
            break;
          }
        }
        */
        uint root_hp_it = item2hp_hash.head(item2hp_id[root_item][cl]);
        uint pnb_id = hash_t.hp.d[item2hp_hash.bit2c(root_hp_it)].pnb;
        for ( ; root_hp_it != UINT_NULL; root_hp_it = item2hp_hash.bit2right(root_hp_it)) {
          HashPointer* root_hp = &(hash_t.hp.d[item2hp_hash.bit2c(root_hp_it)]);
          if (memcmp(active_elem_lsh + cl*K, root_hp->lsh_hash_code, sizeof(int)*K) == 0){
            need_new_bucket = false;
            break;
          }          
        }

        /* xyx: I do not think this is needed. This should have been already found
                by the previous procedure.
        uint hp_id = hash_t.find(active_elem_lsh + cl*K, cl);
        if (hp_id != UINT_NULL) {
          //HashPointer* hp = &(hash_t.hp.d[hp_id]);
          if (hash_t.hp.d[hp_id].pnb == pnb_id)
            need_new_bucket = false;
        }
        */

        if (need_new_bucket) {
          uint new_hp_id = hash_t.hp.new_elem();
          HashPointer* new_hp = &(hash_t.hp.d[new_hp_id]);
          memcpy(new_hp->lsh_hash_code, active_elem_lsh + cl*K, sizeof(int)*K);
          new_hp->hash_code = hash_t.hash_from_lsh(active_elem_lsh + cl*K);
          new_hp->pnb = pnb_id;
          hash_t.insert(new_hp_id, new_hp->hash_code, cl);
          //item2hp[root_item][cl].insert(new_hp_id);
          item2hp_hash.insert_no_duplicate(item2hp_id[root_item][cl], new_hp_id);
        }
      }
    }
  }
  
}


OneStep::OneStep(uint _vals_len, uint _value_table_size, uint _num_items, \
                 uint _K, uint _L, valueType lsh_r)
  : vals_len(_vals_len), value_table_size(_value_table_size),
    num_items(_num_items), K(_K), L(_L), 
    pnb(), hash_t(_K, _L),
    lsh(7, lsh_r, _K, _L, _vals_len), inv(_num_items),
    item2hp_hash(_num_items*_L), 
    item2hp_id(_num_items)
{
  printf("In onestep constructor\n");
  fflush(stdout);
  old_v = new valueType[value_table_size];
  new_v = new valueType[value_table_size];

  /*
  for (uint i = 0; i < _num_items; ++ i) {
    for (uint j = 0; j < _L; ++ j) {
      set< uint >* empty_set = new set< uint >();
      item2hp[i].push_back(*empty_set);
    }
  }
  */
  for (uint i = 0; i < _num_items; ++ i) {
    for (uint j = 0; j < _L; ++ j) {
      uint idhp = item2hp_hash.new_id();
      item2hp_id[i].push_back(idhp);
    }
  }
  
  printf("Exit onestep constructor\n");
  fflush(stdout);
}

OneStep::~OneStep() {
  delete [] old_v;
  delete [] new_v;
  //delete [] item2bucket;
  //for (uint i = 0; i < num_items; ++ i)
  //  delete [] item2hp[i];
  //delete [] item2hp;
}



SpinodalDecompOneStep::SpinodalDecompOneStep(valueType _lsh_r, uint _size, uint _K,\
                                             uint _L, valueType _A, valueType _kappa,\
                                             valueType _M, valueType _dt, valueType _h) :
  OneStep(vals_len, _size*_size, _size*_size, _K, _L, _lsh_r),
  lsh_r(_lsh_r), size(_size), K(_K), L(_L), 
  A(_A), kappa(_kappa), M(_M), dt(_dt), h(_h), h2(_h*_h), h4(h2*h2)
{
  /*
  int k = 0;
  for (int i = -2; i < 2; ++ i)
    for (int j = -2; j < 2; ++ j){
      dx[k] = i;
      dy[k] = j;
      ++k;
    }
  */    
}


void SpinodalDecompOneStep::assign_vals(valueType* old_v, uint c_old, \
                                        valueType* new_v, uint c_new) {
  new_v[c_new] = old_v[c_old];
}

void SpinodalDecompOneStep::grab_vals(uint item, valueType* value_table, \
                                      valueType* vals) {
  Coordinate2d c(0, 0);
  c.from_item(item, size);

  for (uint i = 0; i < vals_len; ++ i) {
    Coordinate2d cc(c);
    cc.x += dx[i];
    cc.y += dy[i];

    cc.x = max(cc.x, 0);      // smch: may be changed to mod operation 
    cc.x = min(cc.x, size-1);
    cc.y = max(cc.y, 0);
    cc.y = min(cc.y, size-1);

    uint this_item = cc.to_item(size);

    uint pd = inv.item2pd[this_item];
    uint root = inv.find_(pd);
    uint root_item = inv.d_item(root);

    vals[i] = value_table[root_item];
  }
}


valueType inner_product(const valueType* a, const valueType* b, uint len) {
  valueType s = 0;
  for (uint i = 0; i < len; ++ i)
    s += a[i]*b[i];
  return s;
}


void SpinodalDecompOneStep::forward_one_step(valueType* vals, uint c, \
                                             valueType* new_v) {
  valueType A2c[lap_len];  // calc 2*A*c*(1-c)**2 - 2*A*c**2*(1-c) = 2*A*c*(1-c)*(1-2c)

  for (uint i = 0; i < lap_len; ++ i)
    A2c[i] = 2*A*vals[i]*(1-vals[i])*(1-2*vals[i]);

  valueType r1 = inner_product(A2c, lapw, lap_len) / h2;

  valueType r2 = kappa*inner_product(vals, laplapw, vals_len) / h4;

  new_v[c] = vals[0] + dt*M*(r1 - r2);

  new_v[c] = max(new_v[c], (valueType)0.0);
  new_v[c] = min(new_v[c], (valueType)1.0);
}


void SpinodalDecompOneStep::move_out_neighbor_from_n_list(uint item, PNBucket* t) {
  Coordinate2d c(0, 0);
  c.from_item(item, size);

  uint root_item = t->p_list;
  uint root_pd = inv.item2pd[root_item];

  for (uint i = 0; i < vals_len; ++ i) {
    Coordinate2d cc(c);
    cc.x += dx[i];
    cc.y += dy[i];

    cc.x = max(cc.x, 0);
    cc.x = min(cc.x, size-1);
    cc.y = max(cc.y, 0);
    cc.y = min(cc.y, size-1);

    if (cc.x == c.x && cc.y == c.y)
      continue;

    //set< uint >::iterator cc_it = (t->n_list).find(cc.to_item(size));
    //if (cc_it == (t->n_list).end())
    //  continue;
    uint cc_hash;
    uint cc_it = pnb.n_list_hash.find(t->n_list_id, cc.to_item(size), cc_hash);
    if (cc_it == UINT_NULL)
      continue;

    // check its neighbor
    bool clean_out = true;
    for (uint j = 0; j < vals_len; ++ j) {
      Coordinate2d c3(cc);
      c3.x += dx[i];
      c3.y += dy[i];

      c3.x = max(c3.x, 0);
      c3.x = min(c3.x, size-1);
      c3.y = max(c3.y, 0);
      c3.y = min(c3.y, size-1);

      if (inv.find_(inv.item2pd[c3.to_item(size)]) == root_pd) {
        clean_out = false;
        break;
      }
    }
    if (clean_out) {
      //(t->n_list).erase(cc_it);
      pnb.n_list_hash.delete_(cc_it);
    }
  }

  //if ((t->n_list).find(item) != (t->n_list).end())
  //  return ;
  uint item_hash;
  uint item_it = pnb.n_list_hash.find(t->n_list_id, item, item_hash);
  if (item_it != UINT_NULL)
    return ;
  
  bool add_in = false;
  for (uint i = 0; i < vals_len; ++ i) {
    Coordinate2d cc(c);
    cc.x += dx[i];
    cc.y += dy[i];

    cc.x = max(cc.x, 0);
    cc.x = min(cc.x, size-1);
    cc.y = max(cc.y, 0);
    cc.y = min(cc.y, size-1);

    if (cc.x == c.x && cc.y == c.y)
      continue;

    if (inv.find_(inv.item2pd[cc.to_item(size)]) == root_pd) {
      add_in = true;
      break;
    }    
  }  

  if (add_in){
    //(t->n_list).insert(item);
    pnb.n_list_hash.insert_(t->n_list_id, item, item_hash);
  }  
}


void SpinodalDecompOneStep::merge_neighbor_into_n_list(uint item, \
                                                       PNBucket* t)  {
  Coordinate2d c(0, 0);
  c.from_item(item, size);

  uint root_item = t->p_list;
  uint root_pd = inv.item2pd[root_item];

  for (uint i = 0; i < vals_len; ++ i) {
    Coordinate2d cc(c);
    cc.x += dx[i];
    cc.y += dy[i];

    cc.x = max(cc.x, 0);
    cc.x = min(cc.x, size-1);
    cc.y = max(cc.y, 0);
    cc.y = min(cc.y, size-1);

    if (cc.x == c.x && cc.y == c.y)
      continue;
    
    uint this_item = cc.to_item(size);
    //if ((t->n_list).find(this_item) != (t->n_list).end())
    //  continue;
    // this does not need to be implemented; because we automatically ensure no duplication.
    
    uint pd = inv.item2pd[this_item];

    if (inv.find_(pd) != root_pd) {
      //(t->n_list).insert(this_item);
      pnb.n_list_hash.insert_no_duplicate(t->n_list_id, this_item);
    }
  }

  //set<uint>::iterator it = (t->n_list).find(item);
  //if (it != (t->n_list).end())
  //  (t->n_list).erase(it);
  uint item_hash;
  uint it = pnb.n_list_hash.find(t->n_list_id, item, item_hash);
  if (it != UINT_NULL)
    pnb.n_list_hash.delete_(it);
}


void SpinodalDecompOneStep::encode_from_img(valueType **img) {
  // old_v
  Coordinate2d c(0, 0);
  for (c.x = 0; c.x < size; ++c.x) {
    for (c.y = 0; c.y < size; ++c.y) {
      uint item = c.to_item(size);
      old_v[item] = img[c.x][c.y];
    }
  }

  // inv
  uint usize = (uint)size;
  uint size2 = usize*usize;
  for (uint i = 0; i < size2; ++ i)
    inv.makeset(i);

  //inv.check_from_dfslist(size2);
  
  //
  valueType vals[vals_len];
  int item_lsh[K];
  uint item_k = 0, item = 0;
  
  uint item_vec[size2];
  for (item_k = 0; item_k < size2; ++ item_k)
    item_vec[item_k] = item_k;
  random_shuffle(item_vec, item_vec + size2);
  
  for (item_k = 0; item_k < size2; ++ item_k) {
    item = item_vec[item_k];
    //printf("encode item=%u\n", item);
    //fflush(stdout);
    
    grab_vals(item, old_v, vals);

    uint cl = 0;
    // determine if can add to other pnb;
    uint pnb_to_add = UINT_NULL;
    for (cl = 0; cl < L; ++ cl) {
      lsh.lsh(vals, cl, item_lsh);

      uint hp_it = hash_t.find(item_lsh, cl);
      if (hp_it != UINT_NULL) {
        pnb_to_add = hash_t.hp.d[hp_it].pnb;
        break;
      }
    }

    uint item_pd = inv.item2pd[item];
    PNBucket* pn_it = NULL;
    if (pnb_to_add != UINT_NULL) {
      // merge into this bucket
      pn_it = &(pnb.d[pnb_to_add]);
      uint ori_pn_plist = pn_it->p_list;
      uint pn_pd = inv.item2pd[pn_it->p_list];
      uint root = inv.union_(item_pd, pn_pd);
      pn_it->p_list = inv.d_item(root);

      if (pn_it->p_list != ori_pn_plist) {
        /*
        for (cl = 0; cl < L; ++ cl) {
          item2hp[pn_it->p_list][cl].clear();
          item2hp[pn_it->p_list][cl].insert(item2hp[ori_pn_plist][cl].begin(), \
                                            item2hp[ori_pn_plist][cl].end());
          item2hp[ori_pn_plist][cl].clear();
        }
        */
        for (cl = 0; cl < L; ++ cl) {
          item2hp_hash.clear(item2hp_id[pn_it->p_list][cl]);
          item2hp_hash.move_from_id_to_id(item2hp_id[ori_pn_plist][cl], \
                                          item2hp_id[pn_it->p_list][cl]);
        }
      }
      // no need to update old_v (it is their accurate value).
      merge_neighbor_into_n_list(item, pn_it);
    }else{
      pnb_to_add = pnb.new_elem();
      pn_it = &(pnb.d[pnb_to_add]);
      pn_it->p_list = item;
      //assert((pn_it->n_list).size() == 0);
      merge_neighbor_into_n_list(item, pn_it);
    }

    for (cl = 0; cl < L; ++ cl) {
      lsh.lsh(vals, cl, item_lsh);

      uint hp_it = hash_t.find(item_lsh, cl);
      if (hp_it == UINT_NULL || hash_t.hp.d[hp_it].pnb != pnb_to_add) {
        uint hp_id = hash_t.hp.new_elem();
        HashPointer* hp_it = &(hash_t.hp.d[hp_id]);
        memcpy(hp_it->lsh_hash_code, item_lsh, sizeof(int)*K);
        hp_it->hash_code = hash_t.hash_from_lsh(item_lsh);
        hp_it->pnb = pnb_to_add;
        hash_t.insert(hp_id, hp_it->hash_code, cl);
        //item2hp[pn_it->p_list][cl].insert(hp_id);
        item2hp_hash.insert_no_duplicate(item2hp_id[pn_it->p_list][cl], hp_id);
      }      
    }
    
    if (item % size == 0) {
      printf("after processing %u items\n", item);
      //hash_t.print_hash_table(inv);
    }    
  }
  inv.check_from_dfslist(size2);
}

valueType** SpinodalDecompOneStep::decode_to_img() {
  uint usize = (uint)size;
  
  valueType** img = new valueType*[size];
  for (uint i = 0; i < usize; ++ i)
    img[i] = new valueType[size];

  Coordinate2d c(0, 0);
  for (c.x = 0; c.x < size; ++ c.x)
    for (c.y = 0; c.y < size; ++ c.y) {
      uint item = c.to_item(size);
      uint item_pd = inv.item2pd[item];
      uint root = inv.find_(item_pd);
      uint root_item = inv.d_item(root);
      img[c.x][c.y] = old_v[root_item];
    }

  return img;
}


//const valueType SpinodalDecompOneStep::lsh_r = 1e-4;
  
const int SpinodalDecompOneStep::dx[] = {0, 1, 0,-1, 0, 1,-1, 1,-1, 2, 0,-2, 0};
const int SpinodalDecompOneStep::dy[] = {0, 0, 1, 0,-1, 1, 1,-1,-1, 0, 2, 0,-2};
const valueType SpinodalDecompOneStep::laplapw[] = {20,-8,-8,-8,-8,2,2,2,2,1,1,1,1};
const valueType SpinodalDecompOneStep::lapw[] = {-4, 1, 1, 1, 1};


//
void OneStep::check_non_empty_item2hp() {
  for (uint item = 0; item < num_items; ++ item) {
    uint root = inv.d_item(inv.find_(inv.item2pd[item]));
    for (uint cl = 0; cl < L; ++ cl) {
      //printf("check_non_empty_item2hp: item=%u, root=%u, cl=%u, EMPTY!\n", item, \
              root, cl);
      //fflush(stdout);
      if (item2hp_hash.head(item2hp_id[root][cl]) == UINT_NULL) {
        printf("Error check_non_empty_item2hp: item=%u, root=%u, cl=%u, EMPTY!\n", item,\
               root, cl);
        fflush(stdout);
      }
      assert(item2hp_hash.head(item2hp_id[root][cl]) != UINT_NULL);

      uint bit = item2hp_hash.head(item2hp_id[root][cl]);
      while (bit != UINT_NULL) {
        uint pnb_it = hash_t.hp.d[item2hp_hash.bit2c(bit)].pnb;
        if (pnb.d[pnb_it].p_list != root) {
          printf("Error check_non_empty_item2hp: item=%u, root=%u, cl=%u, pnb_it=%u, p_list=%u ERROR!\n", item, root, cl, pnb_it, (pnb.d[pnb_it].p_list));
        }
        assert(pnb.d[pnb_it].p_list == root);
        bit = item2hp_hash.bit2right(bit);  
      }
    }
  }
}


//
void OneStep::print_PNBuckets_to_file(const char* bucket_file) {
  FILE* oup = fopen(bucket_file, "w");

  
  fprintf(oup, "bl=[");
  bool first = true;
  
  vector< vector< uint > >::iterator it;
  set<uint> pnb_it_set;
  
  for (it = hash_t.l[0].begin(); it != hash_t.l[0].end(); ++ it) {
    vector< uint >::iterator itt = it->begin();
    for ( ; itt != it->end(); ++ itt) {
      uint pnb_it = hash_t.hp.d[*itt].pnb;
      if (pnb_it_set.find(pnb_it) == pnb_it_set.end()) {
        if (first) {
          fprintf(oup, "[");
          first = false;
        }else{
          fprintf(oup, ", [");
        }
        uint root = (inv.item2pd[pnb.d[pnb_it].p_list]);
        inv.print_recursive_tree_items(root, root, oup);
        fprintf(oup, "]");
        pnb_it_set.insert(pnb_it);
      }
    }
  }
  
  fprintf(oup, "]\n");
  fclose(oup);
  
}


// Below is the implementation from Chonghao

/*
// smch: supposed memory of vals are allocated before calling this function
// smch: in the future will use grab_pixel to get pixels' coordinate
void OneStep::grab_vals(int x, int y, valueType **value_table, valueType *vals, int order) {
    int size = sizeof value_table / sizeof value_table[0];
    int max_h = size - 1;
    int max_w = size - 1;
    if (order == 1) {
        int x__1 = x - 1, x_1 = x + 1;
        int y__1 = y - 1, y_1 = y + 1;
        if (x == 0) {
            x__1 = max_h;
        }
        else if (x == max_h) {
            x_1 = 0;
        }
        if (y == 0) {
            y__1 = max_w;
        }
        else if (y == max_w) {
            y_1 = 0;
        }
        // use x__1, x, x_1, y__1, y, y_1 to find the representative in inverse table inv
    }
    else if (order == 2) {
        int x__2 = x - 2, x__1 = x - 1, x_1 = x + 1, x_2 = x + 2;
        int y__2 = y - 2, y__1 = x - 1, y_1 = y + 1, y_2 = y + 2;
        switch (x) {
            case 0: {
                break;
            }
            case 1: {
                break;
            }
            default: {
                if (x == max_h) {

                }
                else if (x == (max_h - 1)) {

                }
                break;
            }
        }
        switch (y) {
            case 0: {
                break;
            }
            case 1: {
                break;
            }
            default: {
                if (y == max_w) {

                }
                else if (y == (max_w - 1)) {

                }
                break;
            }
        }
        // use x__2 ... to grab values from inverse table and hash table
    } else {
        printf("wrong order\n");
    }
}

// supposed the vector is inited before calling this function
// since grab_vals has similar logic, so grab_vals will call grab_pixel to get pixel coordinates.
void OneStep::grab_pixel(int x, int y, int size, vector <Coordinate> pixel_list, int order) {
    int max_h = size - 1;
    int max_w = size - 1;
    if (order == 1) {
        int x__1 = x - 1, x_1 = x + 1;
        int y__1 = y - 1, y_1 = y + 1;
        if (x == 0) {
            x__1 = max_h;
        }
        else if (x == max_h) {
            x_1 = 0;
        }
        if (y == 0) {
            y__1 = max_w;
        }
        else if (y == max_w) {
            y_1 = 0;
        }
        // use x__1, x, x_1, y__1, y, y_1 to construct neighbors' coordinate
    }
    else if (order == 2) {
        int x__2 = x - 2, x__1 = x - 1, x_1 = x + 1, x_2 = x + 2;
        int y__2 = y - 2, y__1 = x - 1, y_1 = y + 1, y_2 = y + 2;
        switch (x) {
            case 0: {
                break;
            }
            case 1: {
                break;
            }
            default: {
                if (x == max_h) {

                }
                else if (x == (max_h - 1)) {

                }
                break;
            }
        }
        switch (y) {
            case 0: {
                break;
            }
            case 1: {
                break;
            }
            default: {
                if (y == max_w) {

                }
                else if (y == (max_w - 1)) {

                }
                break;
            }
        }
        // use x__2 ... to construct neighbors' coordinate
    } else {
        printf("wrong order\n");
    }
}

*/
